# author: Han Zhang
library(ggplot2)
library(reshape2)



dp = read.csv("keyword_search_placebo_count.csv", sep = "\t", stringsAsFactors = F)
dp = dp[dp$Dataset != "Posts with 50 keywords (in hundreds)",]

dp = dp[, c(1,2,5,4)]
names(dp)[2]  = "count"
dp$quarter = as.numeric(substr(dp$month,6,8))



##########################################################

# new event data
df = read.csv("../../output/protest_posts.csv", sep = "\t", stringsAsFactors = F)
df$date = sapply(strsplit(df$post_id, "_"), "[", 1)

df$date <- as.Date(df$date)

df$month = substr(df$date, 0, 7)

df_month_count = as.data.frame.table(table(df$month))
colnames(df_month_count) <- c("month",   "count")
df_month_count$Dataset = "CASM"
df_month_count[1:(nrow(df_month_count)-1),]
df_month_count$quarter = as.numeric(substr(df_month_count$month,6,8))
df_month_count = df_month_count[df_month_count$month != "2017-07",]
df_month_count[order(df_month_count$count, decreasing = T),]



gap = data.frame(ratio = df_month_count$count/ dp$count, month = df_month_count$month)
gap[order (gap$ratio, decreasing = T),]

gap$quarter = as.numeric(substr(gap$month,6,8))
gap$month_numeric <- as.numeric(as.factor(gap$month))


quar <- function(x,y) # x is quarter, y is dataset label
{
  if(x %% 3 == 1) 
  {
    if (x%/%3 == 0)
    {
      return ("Jan")
    }
    if (x%/%3 == 1)
    {return ("Apr")}
    if (x%/%3 == 2)
    {return ("Jul")}
    if (x%/%3 == 3)
    {return ("Oct")}
  }
  else {
    return ("");    
  }
}

gap$quarter <- mapply(quar, gap$quarter, gap$ratio)
gap$year = substr(gap$month, 1,4)
quarter_label <- gap$quarter


cairo_pdf("CASM_divide_irrelevant.pdf", width = 12, height = 8)


g1 <- ggplot(gap, aes(x= month_numeric,
                      y = ratio)) + 
  geom_line() +
  geom_smooth() +
  geom_hline(aes(yintercept = 0), linetype = "dashed") +
  theme_bw() + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())  +
  theme(plot.margin = unit(c(1, 1, 4, 1), "lines"),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x=element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank()) +
  theme(axis.text.y= element_text(size = 18, colour = 'black'),
        axis.title.y=element_text(size = 20, colour = 'black'),
        legend.text = element_text(size = 18, colour = 'black'),
        legend.title  = element_text(size = 20, colour = 'black')) +
  theme(legend.position = c(0.78,0.91)) +
  ylab ("Number of CASM Protest Posts\n /\n Number of Protest Irrevant Posts") +
  xlab ("Month") + 
  annotate(geom = "text", x = seq_len(length(quarter_label)), y = - 0.5, label = quarter_label, size = 5) +
  annotate(geom = "text", x = 8 + 11 * (0: 7), y = - 0.3, label = unique(gap$year), size = 6) 

g2 <- ggplot_gtable(ggplot_build(g1))
g2$layout$clip[g2$layout$name == "panel"] <- "off"
grid::grid.draw(g2)


dev.off()